/* 
This file produces the final dataset used for the majority of analysis (5_firm_mn_final.dta)

*/ 


clear all
set matsize 1000
set more off 
#delimit;

* Set directory; 

cd "~/Desktop/migrec_replication/do/";

use  ../dta_secure/4_firm_mn.dta, clear;


** sample restrictions ; 

* restrict to 2012 sample;
*i.e. license year is <= 2008 and non-missing migrants in 2009; 

keep if agency_license_yr <= 2008 ;
drop if elig_migs == 0  | elig_migs == . ; 


* complaint composition ; 

gen harass_rate = harass/complaints; 
gen breach_rate = (breach+ nonpay)/complaints;


*** merge in data on # of new employers and all employers; 


merge m:1 agency_id year using ../dta_secure/employer_chars.dta,
 gen(new_emp_chars);

replace num_employers = 0 if num_employers == . ; 
replace new_employer = 0 if new_employer == . ; 



merge m:1 agency_id  using ../dta_secure/employer_chars_2.dta,
 gen(new_emp_chars2) keep(1 3);



* vars for sample selection; 
* define comparison as elig migs who are just below 100 in 2009 ; 

gen comp_50_100 = (elig_migs >= 50 & elig_migs < 100); 
gen comp_75_100 = (elig_migs >= 75 & elig_migs < 100); 
gen comp_25_175 = (elig_migs >= 25 & elig_migs < 175); 


gen comp_opt = (elig_migs >= 45 & elig_migs <= 155 & elig_migs != .); 
gen comp_opt2 = (elig_migs >= 75 & elig_migs < 125 & elig_migs != .); 


** job order quality; 

winsor num_job_orders, gen(num_job_orders_win) p(0.025); 


* produce mid point of salary; 

replace usd_minsalary_d = usd_minsalary if usd_minsalary_d == . & usd_minsalary != . ; 
replace usd_maxsalary_d = usd_maxsalary if usd_maxsalary_d == . & usd_maxsalary != . ; 


gen usd_mid_salary =  (usd_minsalary_d + usd_maxsalary_d)/2;

gen controlz = (elig_firm == 0 & comp_opt == 1) & year < 2010; 

foreach x of varlist ticket_share_vac med_share_vac accom_share_vac 
food_share_vac usd_mid_salary usd_minsalary_d usd_maxsalary_d { ;

replace `x' = . if year == 2005 | year == 2014; 


};

egen jo_quality = weightave2(ticket_share_vac med_share_vac accom_share_vac 
food_share_vac mean_salary_d_win usd_mid_salary ), normby(controlz); 

label var jo_quality "index of job order quality - salary and amenities";

** merge in domestic worker complaints; 

merge 1:1 agency_id month year using ../dta_secure/dw_complaints.dta, gen(dw_comps) keep(1 3);
merge 1:1 agency_id month year using ../dta_secure/dw_salary.dta, gen(dw_salz) keep(1 3);

gen manlev_1 = manlev_professional;
gen manlev_2 = manlev_skilled;
gen manlev_3 = manlev_semiskill;
gen manlev_4 = manlev_middle;
gen manlev_5 = manlev_clerical;
gen manlev_6 = manlev_unskilled;
gen manlev_7 = manlev_dw;



foreach i of numlist 1/7 {;

gen complaint_rate_manlev_`i' = complaint_manlev_`i'/manlev_`i' if manlev_`i' > 0;
gen breach_rate_manlev_`i' = breach_manlev_`i'/manlev_`i' if manlev_`i' > 0;
gen  harass_rate_manlev_`i' =  harass_manlev_`i'/manlev_`i' if manlev_`i' > 0;
replace salary_manlev_`i' = . if manlev_`i' == . | manlev_`i' == 0; 
};



foreach x of varlist age1 age2 age3 age4 pre_complainants { ; 

gen `x'_share = `x'/migrant; 
replace `x'_share = 0 if `x'_share == . & migrant != . ; 
replace `x'_share = . if migrant == 0 ; 
}; 

label var age1_share "share of migrants under 30"; 
label var age2_share "share of migrants between 30 and 40"; 
label var age3_share "share of migrants between 40 and 50"; 
label var age4_share "share of migrants over 50"; 
label var pre_complainants_share "share of migrants who had previously registered
a complaint";


foreach x of varlist mig_share_* {; 

replace `x' = 0 if migrant == 0; 

};


** CLEAN VARS; 

* 1. vars that must be zero post license; 

** a. migration data; 
* impose zero to make a balanced panel; 
* i.e. if license year is 2009 then zero migrants before that; 

foreach x of varlist 
 firm_exit_1yr firm_exit_2yr firm_exit_6mo
 pre_complainants
rev_d_sinh  partial_qual_marks_mn migrant
    renew_emp_or_self   women
manlev_professional manlev_clerical manlev_middle manlev_skilled 
manlev_semiskill manlev_unskilled manlev_dw
 domestic_worker saudi  complaints renew_emp_or_self harass breach nonpay
 renew_emp_or_self_share mean_salary_d_win
 
 renew_agency renew_agency_emp renew_emp renew_self renew_emp_or_self 
 
 mig_share_women pre_complainants_share age1_share age4_share mig_share_saudi

 
  {; 


replace `x' = 0 if (year <= agency_license_yr) & `x' == . ;
replace `x' = 0 if `x' == .  & (year > agency_license_yr) ;


 };
 
 
 ** b. job order data; 

foreach x of varlist jo_quality  num_employers new_employer  num_job_orders num_vac_req
 num_vac_req_skilltype2
 num_vac_req_construction num_vac_req_domestic num_vac_req_retail 
 mean_vac_req 
 num_jo_good_emp_pre2009_25
 num_jo_good_emp_pre2009
 num_jo_good_emp_pre2009_10
 
 num_jo_bad_emp_pre2009_25
 num_jo_bad_emp_pre2009
 num_jo_bad_emp_pre2009_10
 
 ticket_share_vac med_share_vac accom_share_vac 
food_share_vac usd_mid_salary usd_minsalary_d usd_maxsalary_d

num_job_orders_win  good_employers bad_employers

{; 

replace `x' = 0 if  (year <= agency_license_yr) & `x' == . ;
replace `x' = 0 if `x' == . & (year > agency_license_yr) ;

replace `x' = . if year == 2005 | year == 2014; 

};


egen migrants_0608 = rowmean(migrant_06 migrant_07 migrant_08); 
egen migrants_0609 = rowmean(migrant_06 migrant_07 migrant_08 migrant_09); 

egen migrants_0508_bin = cut(migrants_0508), group(10);
egen migrants_0608_bin = cut(migrants_0608), group(10);
egen migrants_0609_bin = cut(migrants_0609), group(10);
egen migrants_0709_bin = cut(migrant_0709), group(10);


** indicator for types; 

gen ag_type = "high elig" if elig_firm == 1 & high_score == 1; 
replace ag_type = "low elig" if elig_firm == 1 & low_score == 1; 
replace ag_type = "high comp" if elig_firm == 0 & high_score == 1; 
replace ag_type = "low comp" if elig_firm == 0 & low_score == 1; 



* misc; 

winsor migrant, p(0.025) gen(migrant_t975);


gen mig_act = (migrant == 0); 

label var mig_act "Zero Migrants recruited in a month-year ";

* clean missing values ; 


foreach x of varlist  num_vac_req_skilltype2 num_vac_req_construction
 num_vac_req_domestic num_vac_req_retail{ ; 

gen `x'_share = `x'/num_vac_req ; 


};

rename num_vac_req_skilltype2_share num_vac_req_drivers_share;

* generate more stable quality measures; 

bysort agency_id: egen qs_0709_tmp = mean(partial_qual_marks) if
 inlist(year, 2007,2008,2009) & month == 5 ;
bysort agency_id: egen qs_0709 = max(qs_0709_tmp)  ; 

label var qs_0709 "mean yrly quant score 07-09";


gen qs_09_tmp = partial_qual_marks if year == 2009 & month == 5 ;
bysort agency_id: egen qs_09 = max(qs_09_tmp)  ; 

label var qs_09 "quant score 09";


bysort agency_id: egen qs_0508_tmp = mean(partial_qual_marks) if
 inlist(year, 2005,2006,2007, 2008) & month == 5 ;
bysort agency_id: egen qs_0508 = max(qs_0508_tmp)  ; 

label var qs_0508 "mean yrly quant score 05-08";



bysort agency_id: egen qs_0608_tmp = mean(partial_qual_marks) if
 inlist(year, 2006,2007, 2008) & month == 5 ;
bysort agency_id: egen qs_0608 = max(qs_0608_tmp)  ; 

label var qs_0608 "mean yrly quant score 06-08";



bysort agency_id: egen qs_0609_tmp = mean(partial_qual_marks) if
 inlist(year, 2006,2007,2008,2009) & month == 5 ;
bysort agency_id: egen qs_0609 = max(qs_0609_tmp)  ; 

label var qs_0609 "mean yrly quant score 06-09";


foreach x of varlist  qs_0609 qs_0608 qs_0508 qs_0709 qs_09 {; 


sum `x' if year == 2009 & month == 2, det; 

gen h_`x' = (`x' > `r(p50)'); 
gen l_`x'= (`x' <= `r(p50)'); 

gen h_`x'_elig_post = h_`x'*post*elig_firm; 
gen l_`x'_elig_post = l_`x'*post*elig_firm; 


gen h_`x'_elig_post_2009 = h_`x'*post_2009*elig_firm; 
gen l_`x'_elig_post_2009 = l_`x'*post_2009*elig_firm; 

gen h_`x'_post = h_`x'*post; 
gen l_`x'_post = l_`x'*post; 

gen h_`x'_post_2009 = h_`x'*post_2009; 
gen l_`x'_post_2009 = l_`x'*post_2009; 


gen h_`x'_elig_postm = h_`x'*postm*elig_firm; 
gen l_`x'_elig_postm = l_`x'*postm*elig_firm; 


gen h_`x'_elig_postm_2009 = h_`x'*postm_2009*elig_firm; 
gen l_`x'_elig_postm_2009 = l_`x'*postm_2009*elig_firm; 

gen h_`x'_elig_postmp_2009 = h_`x'*postmp_2009*elig_firm; 
gen l_`x'_elig_postmp_2009 = l_`x'*postmp_2009*elig_firm; 

gen h_`x'_postm = h_`x'*postm; 
gen l_`x'_postm = l_`x'*postm; 

gen h_`x'_postm_2009 = h_`x'*postm_2009; 
gen l_`x'_postm_2009 = l_`x'*postm_2009; 


gen h_`x'_postmp_2009 = h_`x'*postmp_2009; 
gen l_`x'_postmp_2009 = l_`x'*postmp_2009; 


};

*** make variables for mean reversion tests;

foreach x of numlist 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 {; 

gen year_`x' = (year == `x');
gen elig_firm_`x' = elig_firm*year_`x' ; 

};



foreach x of numlist 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 {; 

gen high_elig_`x' = high_score*elig_firm*year_`x' ; 
gen low_elig_`x' = low_score*elig_firm*year_`x' ; 

gen high_`x' = high_score*year_`x' ; 
gen low_`x' = low_score*year_`x' ; 

gen h_qs_0709_elig_`x' = h_qs_0709*elig_firm*year_`x';
gen l_qs_0709_elig_`x' = l_qs_0709*elig_firm*year_`x';



gen h_qs_0709_`x' = h_qs_0709*year_`x';
gen l_qs_0709_`x' = l_qs_0709*year_`x';

gen h_qs_09_elig_`x' = h_qs_09*elig_firm*year_`x';
gen l_qs_09_elig_`x' = l_qs_09*elig_firm*year_`x';



gen h_qs_09_`x' = h_qs_09*year_`x';
gen l_qs_09_`x' = l_qs_09*year_`x';


};

egen qs_0508_bin = cut(qs_0508), group(10) ;

label var qs_0508_bin "group 4-yr avg into 10 bins";


egen qs_0608_bin = cut(qs_0608), group(10) ;

label var qs_0608_bin "group 4-yr avg into 10 bins";

egen qs_0609_bin = cut(qs_0609), group(10) ;

label var qs_0609_bin "group 4-yr avg into 10 bins";

egen qs_0709_bin = cut(qs_0709), group(10) ;

label var qs_0709_bin "group 3-yr avg into 10 bins";



egen qs_09_bin = cut(qs_09), group(10) ;

label var qs_09_bin "group 09 avg into 10 bins";


egen predicted_score_bin = cut(predicted_score), group(10) ;

label var predicted_score_bin "group predicted score into 10 bins";



* misc ; 

replace firm_exit_1yr = 0 if firm_exit_1yr == 1 & year < 2010 & comp_opt == 1 ;


 
* drop fraudulent  agency_id = 30044 -- this agency is responsible for the;
*3 largest monthly recruitment totals in the entire dataset and does not have;
*corresponding job order data; 

drop if agency_id == 30044; 
 
* create yearly avg. recruitment var; 

bysort agency_id: egen migrantz_05 = sum(migrant) if inlist(year,2005) ;
replace migrantz_05 = 0 if migrantz_05 == . ;
bysort agency_id: egen migrant_05 = max(migrantz_05) ;

 egen migrants_0508y = rowmean(migrant_05 migrant_06 migrant_07 migrant_08); 
 label var migrants_0508y "yearly avg recruitment between 05-08"; 
 egen migrants_0508y_bin = cut(migrants_0508y), group(10);



 egen migrants_0509y = rowmean(migrant_05 migrant_06 migrant_07 migrant_08 migrant_09) ; 
 label var migrants_0509y "yearly avg recruitment between 05-09"; 
 egen migrants_0509y_bin = cut(migrants_0509y), group(10);
 
 
* create biannual var; 
gen bimonth = 1;
replace bimonth = 2 if month >= 7 ;
egen biannual = group(year bimonth) ;


* code as missing variables that make no sense as a zero; 
* e.g. if there are no job orders, makes no sense to say avg contract;
* quality is zero; 


foreach x of varlist mean_salary_d_win jo_quality 
new_employer num_employers num_jo_good_emp_pre2009_25
num_jo_bad_emp_pre2009_25


renew_emp_or_self_share 
renew_agency_share renew_agency_emp_share renew_emp_share
renew_self_share 

mig_share_women pre_complainants_share age1_share age4_share mig_share_saudi

ticket_share_vac  med_share_vac accom_share_vac food_share_vac usd_minsalary_d


 {; 


replace `x' = . if migrant == 0 & year < 2010;  

};



* generate vars for job order composition robustness; 

foreach x of varlist num_jo_bad_emp_pre2009 num_jo_bad_emp_pre2009_25 
num_jo_bad_emp_pre2009_10 num_jo_good_emp_pre2009 
num_jo_good_emp_pre2009_25 num_jo_good_emp_pre2009_10 {;

gen `x'_m = `x'; 
replace `x'_m =  . if migrant == 0 & year < 2010;

gen `x'_s = `x' / num_job_orders; 
replace `x'_s = 0 if num_job_orders == 0; 
gen `x'_s_m = `x'_s;
replace `x'_s_m = . if migrant == 0 & year < 2010;

};



*misc; 

* create pre-program share of migrants who are domestic worker;

bysort agency_id: egen dom_share_pre_tmp = mean(domestic_worker_share) if 
inlist(year,2005,2006,2007, 2008, 2009); 


bysort agency_id: egen dom_share_pre = max(dom_share_pre_tmp); 
drop dom_share_pre_tmp; 

label var dom_share_pre "Share of domestic workers pre-program";

label var migrant "\# Migrants";

	label var women "\# Female Migrants";
	label var domestic_worker "\# Domestic Worker Migrants";
	label var saudi "\# Migrants to Saudi Arabia";
	label var mean_salary_d_win "Salary (USD), 2015 dollars";
	label var num_job_orders_win "\# Job Orders";
	label var num_vac_req "\# Vacancies";
	label var complaints "\# Complaints";
	label var comp_int "Complaint rate";
	label var solved_rate "Complaint solved rate";
	label var solv_months "Complaint solved time"; 
	
	gen comp_int_m = comp_int; 
	replace comp_int_m = . if migrant == 0; 
	
	gen solved_rate_m = solved_rate; 
	replace solved_rate_m = . if migrant == 0 | filed_complaints == 0; 
	
	gen solved_months_m = solv_months; 
	replace solved_months_m = . if migrant == 0 | filed_complaints == 0; 
	
	label var comp_int_m "Complaint rate";
	label var solved_rate_m "Complaint solved rate";
	label var solved_months_m "Complaint solved time";
	

* variables needed for rdd ; 


gen yr_13 = (year == 2013); 
gen yr_14 = (year == 2014); 
gen yr_15 = (year == 2015); 



** HETEROGENEOUS EFFECTS FOR LOW QUALITY AGENCIES;

* first, make variables needed for heterogeneous effects; 


gen pre_good = good_employers;
gen age_09 = firm_age_09 ; 
gen density = elig_100ft;



foreach x of varlist age_09 density pre_good {; 

sum `x' if low_score == 1 , det; 
gen `x'_ab = (`x' > `r(p50)'); 
gen `x'_ef_postmp_09 = elig_firm_postmp_2009*`x'_ab ; 
gen `x'_ef_postm = elig_firm_postm*`x'_ab; 
gen `x'_ef_postm_09 = 	elig_firm_postm_2009*`x'_ab; 
gen `x'_postmp = postmp_2009*`x'_ab ; 
gen `x'_postm = postm*`x'_ab; 
gen `x'_postm_09 = postm_2009*`x'_ab;  
gen `x'_elig = elig_firm*`x'_ab;

};



** variables examining investment in rating criteria; 
* generate a star rating that incorporates investments in quant score; 
 
gen quality_control_11_tmp = partial_qual_marks if year == 2011;
bysort agency_id: egen quality_control_11 = max(quality_control_11_tmp); 


gen performance_rate_11 = (rp_partial_audit_marks + quality_control_11)*(4/7); 


* give audit criteria points; 


foreach x in lg_f1_renewal lg_f2_bank_guaranty lg_f3_book_a lg_f3_book_b lg_f3_book_c 
		lg_f3_book_d lg_f3_receipt lg_f3_passport lg_f4_1a lg_f4_1b_nameboard 
		lg_f4_1c_appearance lg_f4_2a_office_area lg_f4_2b_process_mgmt lg_f4_2c_equip 
		lg_f6_i_office_staff lg_f6_ii_id_cards lg_f7_i_bio_data lg_f7_ii_awareness 
		lg_g1_higher_ed lg_g2_advanced_ed lg_g3_ordinary_ed lg_g4_lower_ed lg_e_cess_payment
		lg_extra_bonus {;
	
		g xx_`x' = 5 if `x' == "A";
		replace xx_`x' = 4 if `x' == "B";
		replace xx_`x' = 3 if `x' == "C";
		replace xx_`x' = 2 if `x' == "D";
		replace xx_`x' = 1 if `x' == "E";
		replace xx_`x' = 0 if `x' == "Z";
	};

	egen rp_partial_audit_marks_check = rowtotal(xx_*),m;



egen record_keeping = rowtotal(xx_lg_f1_renewal xx_lg_f2_bank_guaranty xx_lg_f3_book_a xx_lg_f3_book_b xx_lg_f3_book_c 
		xx_lg_f3_book_d xx_lg_f3_receipt xx_lg_f3_passport); 
		
		
egen marketing = rowtotal(xx_lg_f4_1a xx_lg_f4_1b_nameboard 
		xx_lg_f4_1c_appearance xx_lg_f4_2a_office_area xx_lg_f4_2b_process_mgmt xx_lg_f4_2c_equip 
		xx_lg_f6_i_office_staff xx_lg_f6_ii_id_cards); 
		
egen staff = rowtotal(xx_lg_g1_higher_ed xx_lg_g2_advanced_ed xx_lg_g3_ordinary_ed xx_lg_g4_lower_ed);

egen other = rowtotal(xx_lg_e_cess_payment xx_lg_extra_bonus);

gen comp_opt_09 = (elig_migs >= 45 & elig_migs <= 155 & elig_migs != .) & agency_license_yr <= 2008; 


	
save ../dta_secure/5_firm_mn_final, replace;